In [4]:
import pandas as pd
In [5]:
df=pd.read_excel(r"D:\pitf.world.20160101-20200229.xlsx\pitf.world.20160101-20200229.xlsx",skiprows=2)
In [6]:
df
Out[6]:
| Event Type | Campaign Identifier | Event Reporting | Start Day | Start Month | Start Year | End Day | End Month | End Year | Country | ... | Link | Primary Source Type | Primary Source | Secondary Source Type | Secondary Source | Contesting Source Type | Contesting Source | Citation | Comments | Coder | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Incident | NaN | Eyewitness Account | 6 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | NaN | BBC | International | Afghan Islamic Press news agency, Peshawar | Local | NaN | NaN | BBCSAP0020160107ec170025t | NaN | PAS |
| 1 | Incident | NaN | Eyewitness Account | 17 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | NaN | BBC, AFP, Reuters, NYT | International | Shamshad TV, Kabul, Afghan Channel One (1TV), ... | Local | NaN | NaN | BBCMNF0020160117ec1h000rv, AFPR000020160117ec1... | NaN | PAS |
| 2 | Incident | NaN | Eyewitness Account | 20 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | NaN | BBC, AFP, Reuters, AP | International | Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... | Local | NaN | NaN | BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1... | NaN | PAS |
| 3 | Incident | NaN | Eyewitness Account | 3 | 1 | 2016 | 99 | 99 | 9999 | BDI | ... | NaN | BBC | International | Radio France Internationale, Paris | Local | NaN | NaN | BBCAP00020160104ec14000gq | NaN | PAS |
| 4 | Incident | NaN | Eyewitness Account | 15 | 1 | 2016 | 99 | 99 | 9999 | BFA | ... | NaN | AFP, Reuters, AP, CNN | International | NaN | NaN | NaN | NaN | AFPR000020160115ec1f00bcd, AFPR000020160116ec1... | NaN | PAS |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7451 | Campaign | NaN | Secondary Account | 25 | 2 | 2020 | 99 | 99 | 9999 | SYR | ... | NaN | BBC, AFP, AP, CNN | International | BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... | Local | NaN | NaN | BBCMEP0020200225eg2p001h3, AFPR000020200225eg2... | Only region used for geo-coding. | BEB |
| 7452 | Incident | NaN | Eyewitness Account | 8 | 2 | 2020 | 99 | 99 | 9999 | THA | ... | NaN | AFP, CNN, Reuters, AP, NYT | International | NaN | NaN | NaN | NaN | AFPR000020200208eg28006y1, AFPR000020200208eg2... | NaN | BEB |
| 7453 | Incident | NaN | Secondary Account | 2 | 2 | 2020 | 99 | 99 | 9999 | TZA | ... | NaN | Reuters, BBC, All Africa | International | Mwananchi, Dar es Salaam, 1 Feb 20 | Local | NaN | NaN | LBA0000020200202eg22007k9, BBCMNF0020200202eg2... | NaN | BEB |
| 7454 | Incident | NaN | Eyewitness Account | 15 | 2 | 2020 | 99 | 99 | 9999 | YEM | ... | NaN | Reuters, BBC, AP, AFP | International | BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... | Local | NaN | NaN | LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2... | Only district used for geo-coding. | BEB |
| 7455 | Incident | NaN | Secondary Account | 1 | 2 | 2020 | 99 | 99 | 9999 | ZAF | ... | NaN | AFP | International | NaN | NaN | NaN | NaN | AFPR000020200201eg21007y5 | NaN | BEB |
7456 rows × 73 columns
In [7]:
df.shape
Out[7]:
(7456, 73)
In [8]:
df.describe
Out[8]:
<bound method NDFrame.describe of Event Type Campaign Identifier Event Reporting Start Day \
0 Incident NaN Eyewitness Account 6
1 Incident NaN Eyewitness Account 17
2 Incident NaN Eyewitness Account 20
3 Incident NaN Eyewitness Account 3
4 Incident NaN Eyewitness Account 15
... ... ... ... ...
7451 Campaign NaN Secondary Account 25
7452 Incident NaN Eyewitness Account 8
7453 Incident NaN Secondary Account 2
7454 Incident NaN Eyewitness Account 15
7455 Incident NaN Secondary Account 1
Start Month Start Year End Day End Month End Year Country ... Link \
0 1 2016 99 99 9999 AFG ... NaN
1 1 2016 99 99 9999 AFG ... NaN
2 1 2016 99 99 9999 AFG ... NaN
3 1 2016 99 99 9999 BDI ... NaN
4 1 2016 99 99 9999 BFA ... NaN
... ... ... ... ... ... ... ... ...
7451 2 2020 99 99 9999 SYR ... NaN
7452 2 2020 99 99 9999 THA ... NaN
7453 2 2020 99 99 9999 TZA ... NaN
7454 2 2020 99 99 9999 YEM ... NaN
7455 2 2020 99 99 9999 ZAF ... NaN
Primary Source Type Primary Source \
0 BBC International
1 BBC, AFP, Reuters, NYT International
2 BBC, AFP, Reuters, AP International
3 BBC International
4 AFP, Reuters, AP, CNN International
... ... ...
7451 BBC, AFP, AP, CNN International
7452 AFP, CNN, Reuters, AP, NYT International
7453 Reuters, BBC, All Africa International
7454 Reuters, BBC, AP, AFP International
7455 AFP International
Secondary Source Type Secondary Source \
0 Afghan Islamic Press news agency, Peshawar Local
1 Shamshad TV, Kabul, Afghan Channel One (1TV), ... Local
2 Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... Local
3 Radio France Internationale, Paris Local
4 NaN NaN
... ... ...
7451 BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... Local
7452 NaN NaN
7453 Mwananchi, Dar es Salaam, 1 Feb 20 Local
7454 BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... Local
7455 NaN NaN
Contesting Source Type Contesting Source \
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
... ... ...
7451 NaN NaN
7452 NaN NaN
7453 NaN NaN
7454 NaN NaN
7455 NaN NaN
Citation \
0 BBCSAP0020160107ec170025t
1 BBCMNF0020160117ec1h000rv, AFPR000020160117ec1...
2 BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1...
3 BBCAP00020160104ec14000gq
4 AFPR000020160115ec1f00bcd, AFPR000020160116ec1...
... ...
7451 BBCMEP0020200225eg2p001h3, AFPR000020200225eg2...
7452 AFPR000020200208eg28006y1, AFPR000020200208eg2...
7453 LBA0000020200202eg22007k9, BBCMNF0020200202eg2...
7454 LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2...
7455 AFPR000020200201eg21007y5
Comments Coder
0 NaN PAS
1 NaN PAS
2 NaN PAS
3 NaN PAS
4 NaN PAS
... ... ...
7451 Only region used for geo-coding. BEB
7452 NaN BEB
7453 NaN BEB
7454 Only district used for geo-coding. BEB
7455 NaN BEB
[7456 rows x 73 columns]>
In [9]:
df = df.rename(columns={
"Start Day": "start_day",
"Start Month": "start_month",
"Start Year": "start_year",
"Event Type": "event_type",
"Country": "country",
"Region": "region",
"District": "district",
"Deaths Number": "fatalities",
"Description": "summary"
})
In [10]:
df
Out[10]:
| event_type | Campaign Identifier | Event Reporting | start_day | start_month | start_year | End Day | End Month | End Year | country | ... | Link | Primary Source Type | Primary Source | Secondary Source Type | Secondary Source | Contesting Source Type | Contesting Source | Citation | Comments | Coder | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Incident | NaN | Eyewitness Account | 6 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | NaN | BBC | International | Afghan Islamic Press news agency, Peshawar | Local | NaN | NaN | BBCSAP0020160107ec170025t | NaN | PAS |
| 1 | Incident | NaN | Eyewitness Account | 17 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | NaN | BBC, AFP, Reuters, NYT | International | Shamshad TV, Kabul, Afghan Channel One (1TV), ... | Local | NaN | NaN | BBCMNF0020160117ec1h000rv, AFPR000020160117ec1... | NaN | PAS |
| 2 | Incident | NaN | Eyewitness Account | 20 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | NaN | BBC, AFP, Reuters, AP | International | Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... | Local | NaN | NaN | BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1... | NaN | PAS |
| 3 | Incident | NaN | Eyewitness Account | 3 | 1 | 2016 | 99 | 99 | 9999 | BDI | ... | NaN | BBC | International | Radio France Internationale, Paris | Local | NaN | NaN | BBCAP00020160104ec14000gq | NaN | PAS |
| 4 | Incident | NaN | Eyewitness Account | 15 | 1 | 2016 | 99 | 99 | 9999 | BFA | ... | NaN | AFP, Reuters, AP, CNN | International | NaN | NaN | NaN | NaN | AFPR000020160115ec1f00bcd, AFPR000020160116ec1... | NaN | PAS |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7451 | Campaign | NaN | Secondary Account | 25 | 2 | 2020 | 99 | 99 | 9999 | SYR | ... | NaN | BBC, AFP, AP, CNN | International | BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... | Local | NaN | NaN | BBCMEP0020200225eg2p001h3, AFPR000020200225eg2... | Only region used for geo-coding. | BEB |
| 7452 | Incident | NaN | Eyewitness Account | 8 | 2 | 2020 | 99 | 99 | 9999 | THA | ... | NaN | AFP, CNN, Reuters, AP, NYT | International | NaN | NaN | NaN | NaN | AFPR000020200208eg28006y1, AFPR000020200208eg2... | NaN | BEB |
| 7453 | Incident | NaN | Secondary Account | 2 | 2 | 2020 | 99 | 99 | 9999 | TZA | ... | NaN | Reuters, BBC, All Africa | International | Mwananchi, Dar es Salaam, 1 Feb 20 | Local | NaN | NaN | LBA0000020200202eg22007k9, BBCMNF0020200202eg2... | NaN | BEB |
| 7454 | Incident | NaN | Eyewitness Account | 15 | 2 | 2020 | 99 | 99 | 9999 | YEM | ... | NaN | Reuters, BBC, AP, AFP | International | BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... | Local | NaN | NaN | LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2... | Only district used for geo-coding. | BEB |
| 7455 | Incident | NaN | Secondary Account | 1 | 2 | 2020 | 99 | 99 | 9999 | ZAF | ... | NaN | AFP | International | NaN | NaN | NaN | NaN | AFPR000020200201eg21007y5 | NaN | BEB |
7456 rows × 73 columns
In [11]:
# Convert DMS (Degrees, Minutes, Seconds) + Direction into Decimal coordinates
def dms_to_decimal(deg, min_, sec, direction):
try:
dec = deg + min_ / 60 + sec / 3600
if direction in ['S', 'W']:
dec = -dec
return dec
except:
return np.nan
In [12]:
# Apply for latitude and longitude
df['latitude'] = df.apply(lambda row: dms_to_decimal(
row.get('Degrees'), row.get('Minutes'), row.get('Seconds'), row.get('Direction')), axis=1)
df['longitude'] = df.apply(lambda row: dms_to_decimal(
row.get('Degrees.1'), row.get('Minutes.1'), row.get('Seconds.1'), row.get('Direction.1')), axis=1)
# Create full event date from day, month, year
df['event_date'] = pd.to_datetime(dict(
year=df['start_year'], month=df['start_month'], day=df['start_day']), errors='coerce')
In [13]:
df
Out[13]:
| event_type | Campaign Identifier | Event Reporting | start_day | start_month | start_year | End Day | End Month | End Year | country | ... | Secondary Source Type | Secondary Source | Contesting Source Type | Contesting Source | Citation | Comments | Coder | latitude | longitude | event_date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Incident | NaN | Eyewitness Account | 6 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | Afghan Islamic Press news agency, Peshawar | Local | NaN | NaN | BBCSAP0020160107ec170025t | NaN | PAS | 36.934167 | 66.113889 | 2016-01-06 |
| 1 | Incident | NaN | Eyewitness Account | 17 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | Shamshad TV, Kabul, Afghan Channel One (1TV), ... | Local | NaN | NaN | BBCMNF0020160117ec1h000rv, AFPR000020160117ec1... | NaN | PAS | 34.434167 | 70.447778 | 2016-01-17 |
| 2 | Incident | NaN | Eyewitness Account | 20 | 1 | 2016 | 99 | 99 | 9999 | AFG | ... | Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... | Local | NaN | NaN | BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1... | NaN | PAS | 34.533333 | 69.166667 | 2016-01-20 |
| 3 | Incident | NaN | Eyewitness Account | 3 | 1 | 2016 | 99 | 99 | 9999 | BDI | ... | Radio France Internationale, Paris | Local | NaN | NaN | BBCAP00020160104ec14000gq | NaN | PAS | -3.411944 | 29.375833 | 2016-01-03 |
| 4 | Incident | NaN | Eyewitness Account | 15 | 1 | 2016 | 99 | 99 | 9999 | BFA | ... | NaN | NaN | NaN | NaN | AFPR000020160115ec1f00bcd, AFPR000020160116ec1... | NaN | PAS | 12.357222 | -1.535278 | 2016-01-15 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7451 | Campaign | NaN | Secondary Account | 25 | 2 | 2020 | 99 | 99 | 9999 | SYR | ... | BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... | Local | NaN | NaN | BBCMEP0020200225eg2p001h3, AFPR000020200225eg2... | Only region used for geo-coding. | BEB | 35.766667 | 36.666667 | 2020-02-25 |
| 7452 | Incident | NaN | Eyewitness Account | 8 | 2 | 2020 | 99 | 99 | 9999 | THA | ... | NaN | NaN | NaN | NaN | AFPR000020200208eg28006y1, AFPR000020200208eg2... | NaN | BEB | 14.970556 | 102.101944 | 2020-02-08 |
| 7453 | Incident | NaN | Secondary Account | 2 | 2 | 2020 | 99 | 99 | 9999 | TZA | ... | Mwananchi, Dar es Salaam, 1 Feb 20 | Local | NaN | NaN | LBA0000020200202eg22007k9, BBCMNF0020200202eg2... | NaN | BEB | 3.350000 | 37.333333 | 2020-02-02 |
| 7454 | Incident | NaN | Eyewitness Account | 15 | 2 | 2020 | 99 | 99 | 9999 | YEM | ... | BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... | Local | NaN | NaN | LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2... | Only district used for geo-coding. | BEB | 16.095833 | 44.566667 | 2020-02-15 |
| 7455 | Incident | NaN | Secondary Account | 1 | 2 | 2020 | 99 | 99 | 9999 | ZAF | ... | NaN | NaN | NaN | NaN | AFPR000020200201eg21007y5 | NaN | BEB | -26.202222 | 28.043611 | 2020-02-01 |
7456 rows × 76 columns
In [14]:
# Keep only important columns
df_clean=df[['event_type', 'event_date', 'country', 'region', 'district',
'fatalities', 'latitude', 'longitude', 'summary']]
# Remove rows without coordinates
df_clean = df_clean.dropna(subset=['latitude', 'longitude'])
Out[14]:
(7455, 9)
In [15]:
# Convert fatalities to numeric
df_clean['fatalities'] = pd.to_numeric(df_clean['fatalities'], errors='coerce')
# Drop rows with missing or zero fatalities
df_clean = df_clean[df_clean['fatalities'] > 0]
# Check the shape
df_clean.shape
Out[15]:
(7455, 9)
In [16]:
# Total events
total_events = df_clean.shape[0]
# Unique countries affected
total_countries = df_clean['country'].nunique()
# Total fatalities
total_fatalities = df_clean['fatalities'].sum()
# Date range
date_min = df_clean['event_date'].min()
date_max = df_clean['event_date'].max()
# Print all
print("Total Atrocity Events:", total_events)
print("Total Countries Affected:", total_countries)
print("Total Civilian Fatalities:", int(total_fatalities))
print("Data Time Range:", date_min.date(), "to", date_max.date())
Total Atrocity Events: 7455 Total Countries Affected: 98 Total Civilian Fatalities: 65954 Data Time Range: 2016-01-01 to 2020-12-31
In [17]:
import matplotlib.pyplot as plt
import seaborn as sns
# Group by country
top_countries = df_clean.groupby('country')['fatalities'].sum().sort_values(ascending=False).head(10)
# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x=top_countries.values, y=top_countries.index, palette='Reds_r')
plt.title("Top 10 Countries by Civilian Fatalities (2016–2020)")
plt.xlabel("Total Fatalities")
plt.ylabel("Country")
plt.tight_layout()
plt.show()
C:\Users\ADMIN\AppData\Local\Temp\ipykernel_2116\4069373457.py:9: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect. sns.barplot(x=top_countries.values, y=top_countries.index, palette='Reds_r')
In [18]:
# Extract year
df_clean['year'] = df_clean['event_date'].dt.year
# Group by year
yearly_deaths = df_clean.groupby('year')['fatalities'].sum()
# Plot
plt.figure(figsize=(8,5))
sns.lineplot(x=yearly_deaths.index, y=yearly_deaths.values, marker='o', color='darkred')
plt.title("Year-wise Fatalities")
plt.xlabel("Year")
plt.ylabel("Total Fatalities")
plt.grid(True)
plt.tight_layout()
plt.show()
In [20]:
pip install folium
Collecting folium Downloading folium-0.20.0-py2.py3-none-any.whl.metadata (4.2 kB) Collecting branca>=0.6.0 (from folium) Downloading branca-0.8.1-py3-none-any.whl.metadata (1.5 kB) Requirement already satisfied: jinja2>=2.9 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (3.1.4) Requirement already satisfied: numpy in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (2.2.5) Requirement already satisfied: requests in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (2.32.3) Requirement already satisfied: xyzservices in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (2024.9.0) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from jinja2>=2.9->folium) (3.0.2) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (3.4.0) Requirement already satisfied: idna<4,>=2.5 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (2.2.3) Requirement already satisfied: certifi>=2017.4.17 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (2024.8.30) Downloading folium-0.20.0-py2.py3-none-any.whl (113 kB) Downloading branca-0.8.1-py3-none-any.whl (26 kB) Installing collected packages: branca, folium Successfully installed branca-0.8.1 folium-0.20.0 Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 24.3.1 -> 25.1.1 [notice] To update, run: python.exe -m pip install --upgrade pip
In [23]:
import folium
from folium.plugins import MarkerCluster
# Create base map
map_base = folium.Map(
location=[df_clean['latitude'].median(), df_clean['longitude'].median()],
zoom_start=2
)
# Create marker cluster
marker_cluster = MarkerCluster().add_to(map_base)
# Use .iterrows() instead of .loc[]
for _, row in df_clean.iterrows():
try:
folium.Marker(
location=[row['latitude'], row['longitude']],
popup=f"{row['country']} – {int(row['fatalities'])} deaths\n{str(row['summary'])[:100]}",
icon=folium.Icon(color='red', icon='info-sign')
).add_to(marker_cluster)
except:
pass # skip rows with bad data
# Show the map
map_base
Out[23]:
Make this Notebook Trusted to load map: File -> Trust Notebook